<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package	Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license	http://framework.zend.com/license/new-bsd	 New BSD License
 */


/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';


/**
 * @category   Zend
 * @package	Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license	http://framework.zend.com/license/new-bsd	 New BSD License
 */
class Zend_Search_Lucene_Search_QueryToken
{
	/**
	 * Token types.
	 */
	const TT_WORD				 = 0;  // Word
	const TT_PHRASE			   = 1;  // Phrase (one or several quoted words)
	const TT_FIELD				= 2;  // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
	const TT_FIELD_INDICATOR	  = 3;  // ':'
	const TT_REQUIRED			 = 4;  // '+'
	const TT_PROHIBITED		   = 5;  // '-'
	const TT_FUZZY_PROX_MARK	  = 6;  // '~'
	const TT_BOOSTING_MARK		= 7;  // '^'
	const TT_RANGE_INCL_START	 = 8;  // '['
	const TT_RANGE_INCL_END	   = 9;  // ']'
	const TT_RANGE_EXCL_START	 = 10; // '{'
	const TT_RANGE_EXCL_END	   = 11; // '}'
	const TT_SUBQUERY_START	   = 12; // '('
	const TT_SUBQUERY_END		 = 13; // ')'
	const TT_AND_LEXEME		   = 14; // 'AND' or 'and'
	const TT_OR_LEXEME			= 15; // 'OR'  or 'or'
	const TT_NOT_LEXEME		   = 16; // 'NOT' or 'not'
	const TT_TO_LEXEME			= 17; // 'TO'  or 'to'
	const TT_NUMBER			   = 18; // Number, like: 10, 0.8, .64, ....


	/**
	 * Returns all possible lexeme types.
	 * It's used for syntax analyzer state machine initialization
	 *
	 * @return array
	 */
	public static function getTypes()
	{
		return array(   self::TT_WORD,
						self::TT_PHRASE,
						self::TT_FIELD,
						self::TT_FIELD_INDICATOR,
						self::TT_REQUIRED,
						self::TT_PROHIBITED,
						self::TT_FUZZY_PROX_MARK,
						self::TT_BOOSTING_MARK,
						self::TT_RANGE_INCL_START,
						self::TT_RANGE_INCL_END,
						self::TT_RANGE_EXCL_START,
						self::TT_RANGE_EXCL_END,
						self::TT_SUBQUERY_START,
						self::TT_SUBQUERY_END,
						self::TT_AND_LEXEME,
						self::TT_OR_LEXEME,
						self::TT_NOT_LEXEME,
						self::TT_TO_LEXEME,
						self::TT_NUMBER
					 );
	}


	/**
	 * TokenCategories
	 */
	const TC_WORD		   = 0;   // Word
	const TC_PHRASE		 = 1;   // Phrase (one or several quoted words)
	const TC_NUMBER		 = 2;   // Nubers, which are used with syntax elements. Ex. roam~0.8
	const TC_SYNTAX_ELEMENT = 3;   // +  -  ( )  [ ]  { }  !  ||  && ~ ^


	/**
	 * Token type.
	 *
	 * @var integer
	 */
	public $type;

	/**
	 * Token text.
	 *
	 * @var integer
	 */
	public $text;

	/**
	 * Token position within query.
	 *
	 * @var integer
	 */
	public $position;


	/**
	 * IndexReader constructor needs token type and token text as a parameters.
	 *
	 * @param integer $tokenCategory
	 * @param string  $tokText
	 * @param integer $position
	 */
	public function __construct($tokenCategory, $tokenText, $position)
	{
		$this->text	 = $tokenText;
		$this->position = $position + 1; // Start from 1

		switch ($tokenCategory) {
			case self::TC_WORD:
				if (  strtolower($tokenText) == 'and') {
					$this->type = self::TT_AND_LEXEME;
				} else if (strtolower($tokenText) == 'or') {
					$this->type = self::TT_OR_LEXEME;
				} else if (strtolower($tokenText) == 'not') {
					$this->type = self::TT_NOT_LEXEME;
				} else if (strtolower($tokenText) == 'to') {
					$this->type = self::TT_TO_LEXEME;
				} else {
					$this->type = self::TT_WORD;
				}
				break;

			case self::TC_PHRASE:
				$this->type = self::TT_PHRASE;
				break;

			case self::TC_NUMBER:
				$this->type = self::TT_NUMBER;
				break;

			case self::TC_SYNTAX_ELEMENT:
				switch ($tokenText) {
					case ':':
						$this->type = self::TT_FIELD_INDICATOR;
						break;

					case '+':
						$this->type = self::TT_REQUIRED;
						break;

					case '-':
						$this->type = self::TT_PROHIBITED;
						break;

					case '~':
						$this->type = self::TT_FUZZY_PROX_MARK;
						break;

					case '^':
						$this->type = self::TT_BOOSTING_MARK;
						break;

					case '[':
						$this->type = self::TT_RANGE_INCL_START;
						break;

					case ']':
						$this->type = self::TT_RANGE_INCL_END;
						break;

					case '{':
						$this->type = self::TT_RANGE_EXCL_START;
						break;

					case '}':
						$this->type = self::TT_RANGE_EXCL_END;
						break;

					case '(':
						$this->type = self::TT_SUBQUERY_START;
						break;

					case ')':
						$this->type = self::TT_SUBQUERY_END;
						break;

					case '!':
						$this->type = self::TT_NOT_LEXEME;
						break;

					case '&&':
						$this->type = self::TT_AND_LEXEME;
						break;

					case '||':
						$this->type = self::TT_OR_LEXEME;
						break;

					default:
						throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
				}
				break;

			case self::TC_NUMBER:
				$this->type = self::TT_NUMBER;

			default:
				throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
		}
	}
}

